*****************************************************************
*This file builds vars on CZ-to-CZ migration from the Census/ACS*
*****************************************************************

use "$clean_data_lmarket/czone1990_StudentMigrationFlowsCombi.dta", clear

*Create all combinations for two periods
keep fromczone toczone
expand 2
gen year=1990
bys toczone fromczone: replace year=2000 if _n==2

merge m:1 fromczone toczone using "$clean_data_lmarket/czone1990_StudentMigrationFlowsCombi.dta", assert(3) nogenerate
merge m:1 fromczone toczone using "$clean_data_lmarket/czone2000_StudentMigrationFlowsCombi.dta", assert(3) nogenerate
merge m:1 fromczone toczone using "$clean_data_lmarket/czone2008_StudentMigrationFlowsCombi.dta", assert(3) nogenerate

*Keep variabbles of interest
keep *czone year *a19_64*

*Population in czone of destination:
*Sum of all non-migrants and migrants in this year over all "fromczone"
foreach year in 1990 2000 2008 {

*Population
bysort toczone year: egen ipums_pop_toczone_a19_64_`year' = sum(ipums_pop_a19_64_`year')

*Students
bysort toczone year: egen ipums_pop_sc_toczone_a19_64_`year' = sum(ipums_pop_sc_a19_64_`year')
}

*************************
*		Population		*
*************************

*Normalized (to 1 year) from Census
foreach year in 1990 2000 {

*Need to normalize migrants (from 5 years to 1 year) and reassign them to non-movers
*1) Divide by 4 or 5 the number of accumulated migrants (set to 0 non-migrants)
gen ipums_m_a19_64_`year' = ipums_pop_a19_64_`year' if toczone!=fromczone
gen ipums_m_a19_64_N4_`year' = ipums_pop_a19_64_`year'/4 if toczone!=fromczone
gen ipums_m_a19_64_N5_`year' = ipums_pop_a19_64_`year'/5 if toczone!=fromczone

replace ipums_m_a19_64_`year' = 0 if toczone==fromczone
replace ipums_m_a19_64_N4_`year' = 0 if toczone==fromczone
replace ipums_m_a19_64_N5_`year' = 0 if toczone==fromczone

*2) Count number of migrants in destination (toczone): standard, /4 and /5
bysort toczone year: egen ipums_m_toczone_a19_64_`year' = sum(ipums_m_a19_64_`year')
bysort toczone year: egen ipums_m_toczone_a19_64_N4_`year' = sum(ipums_m_a19_64_N4_`year')
bysort toczone year: egen ipums_m_toczone_a19_64_N5_`year' = sum(ipums_m_a19_64_N5_`year')

*3) Add difference between migrants standard and migrants/4 or /5 to non-migrants
gen ipums_pop_a19_64_N4_`year' = ipums_m_a19_64_N4_`year' if toczone!=fromczone
replace ipums_pop_a19_64_N4_`year' = ipums_pop_a19_64_`year' + (ipums_m_toczone_a19_64_`year'-ipums_m_toczone_a19_64_N4_`year') if toczone==fromczone

gen ipums_pop_a19_64_N5_`year' = ipums_m_a19_64_N5_`year' if toczone!=fromczone
replace ipums_pop_a19_64_N5_`year' = ipums_pop_a19_64_`year' + (ipums_m_toczone_a19_64_`year'-ipums_m_toczone_a19_64_N5_`year') if toczone==fromczone

*Check that everything correct (same total population as before, just shifted some of the migrants to non-migrants in 1990 and 2000)
bysort toczone year: egen ipums_pop_toczone_a19_64_N4_`year' = sum(ipums_pop_a19_64_N4_`year')
assert abs(ipums_pop_toczone_a19_64_N4_`year'-ipums_pop_toczone_a19_64_`year')<10 //if error, then it is because of approximation

bysort toczone year: egen ipums_pop_toczone_a19_64_N5_`year' = sum(ipums_pop_a19_64_N5_`year')
assert abs(ipums_pop_toczone_a19_64_N5_`year'-ipums_pop_toczone_a19_64_`year')<10 //if error, then it is because of approximation
}

*************************
*		Students		*
*************************

*Normalized (to 1 year) from Census
foreach year in 1990 2000 {

*Need to normalize migrants (from 5 years to 1 year) and reassign them to non-movers
*1) Divide by 4 or 5 the number of accumulated migrants (set to 0 non-migrants)
gen ipums_m_sc_a19_64_`year' = ipums_pop_sc_a19_64_`year' if toczone!=fromczone
gen ipums_m_sc_a19_64_N4_`year' = ipums_pop_sc_a19_64_`year'/4 if toczone!=fromczone
gen ipums_m_sc_a19_64_N5_`year' = ipums_pop_sc_a19_64_`year'/5 if toczone!=fromczone

replace ipums_m_sc_a19_64_`year' = 0 if toczone==fromczone
replace ipums_m_sc_a19_64_N4_`year' = 0 if toczone==fromczone
replace ipums_m_sc_a19_64_N5_`year' = 0 if toczone==fromczone

*2) Count number of migrants in destination (toczone): standard, /4 and /5
bysort toczone year: egen ip_m_sc_toczone_a19_64_`year' = sum(ipums_m_sc_a19_64_`year')
bysort toczone year: egen ip_m_sc_toczone_a19_64_N4_`year' = sum(ipums_m_sc_a19_64_N4_`year')
bysort toczone year: egen ip_m_sc_toczone_a19_64_N5_`year' = sum(ipums_m_sc_a19_64_N5_`year')

*3) Add difference between migrants standard and migrants/4 or /5 to non-migrants
gen ipums_pop_sc_a19_64_N4_`year' = ipums_m_sc_a19_64_N4_`year' if toczone!=fromczone
replace ipums_pop_sc_a19_64_N4_`year' = ipums_pop_sc_a19_64_`year' + (ip_m_sc_toczone_a19_64_`year'-ip_m_sc_toczone_a19_64_N4_`year') if toczone==fromczone

gen ipums_pop_sc_a19_64_N5_`year' = ipums_m_sc_a19_64_N5_`year' if toczone!=fromczone
replace ipums_pop_sc_a19_64_N5_`year' = ipums_pop_sc_a19_64_`year' + (ip_m_sc_toczone_a19_64_`year'-ip_m_sc_toczone_a19_64_N5_`year') if toczone==fromczone

*Check that everything correct (same total population as before, just shifted some of the migrants to non-migrants in 1990 and 2000)
bysort toczone year: egen ip_p_sc_toczone_a19_64_N4_`year' = sum(ipums_pop_sc_a19_64_N4_`year')
assert abs(ip_p_sc_toczone_a19_64_N4_`year'-ipums_pop_sc_toczone_a19_64_`year')<10 //if error, then it is because of approximation

bysort toczone year: egen ip_p_sc_toczone_a19_64_N5_`year' = sum(ipums_pop_sc_a19_64_N5_`year')
assert abs(ip_p_sc_toczone_a19_64_N5_`year'-ipums_pop_sc_toczone_a19_64_`year')<10 //if error, then it is because of approximation
}

*Keep variables of interest
keep fromczone toczone year ipums_pop_a19_64_N?_???? ipums_pop_a19_64_2008 ipums_pop_sc_a19_64_N?_???? ipums_pop_sc_a19_64_2008 ipums_pop_toczone_a19_64_????

*Share of students
foreach year in 1990 2000 {

gen shareStudentsN4_`year' = ipums_pop_sc_a19_64_N4_`year'/ipums_pop_toczone_a19_64_`year'
gen shareStudentsN5_`year' = ipums_pop_sc_a19_64_N5_`year'/ipums_pop_toczone_a19_64_`year'

gen sharePopN4_`year' = ipums_pop_a19_64_N4_`year'/ipums_pop_toczone_a19_64_`year'
gen sharePopN5_`year' = ipums_pop_a19_64_N5_`year'/ipums_pop_toczone_a19_64_`year'
}

foreach year in 2008 {
gen shareStudents_`year' = ipums_pop_sc_a19_64_`year'/ipums_pop_toczone_a19_64_`year'

gen sharePop_`year' = ipums_pop_a19_64_`year'/ipums_pop_toczone_a19_64_`year'
}

*Changes:
foreach type in N4 N5 {
foreach var in shareStudents sharePop {

gen d_`var'`type' = 0.7*100*(`var'`type'_2000-`var'`type'_1990) if year == 1990
replace d_`var'`type' = 100*(`var'_2008-`var'`type'_2000) if year == 2000
}
}

save "$final_data_outcomes/czone_StudentMigrantsCombi.dta", replace


